library(tidyverse)
## Registered S3 method overwritten by 'dplyr':
## method from
## as.data.frame.tbl_df tibble
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## ── Attaching packages ──────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.3 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
install.packages("viridisLite")
## Installing package into '/home/flor3170/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(viridisLite)
mpg
?mpg
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data=mpg)
#1.The graph is blank.
#2.They are 234 rows.
#3. drv describs if the car is 4-wheel drive, front-wheel drive, or rear wheel drive.
#4.
ggplot(data = mpg) +
geom_point(mapping = aes(x = hwy, y = cyl))
#5. The plot is not useful as it does not show any relationship between the numbers of cylinders and highway mileage per gallon.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
## Warning: Using alpha for a discrete variable is not advised.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))
#1. The points are not blue because the variable color is attached to the aesthetics of the objects in the plot, not the plot itself.
#2. Categorical variables in mpg include model, trans, drv, manufacturer, fl, and class. Continous variables include hwy, cty, year, cyl, and displ. This information is given in the table heading, where it lists the variables as <int>, <chr>, and <dbl>.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y= cty, color = cty, size=displ))
#3. A continous variable cannot be mapped to shape, yet for shape and color it produces a gradient of color and spectrum of sizes. Compared to continuous, categorical data is cleaner in sense that the plots are discretely placed.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y= cty, color = cty, size=cty))
#4. When color and size are mapped to the same varaible, it produces a single gradient of size and color.
?geom_point
#5.Stroke aesthetic modifies the width of the # border.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y= cty, color = displ < 5))
#6. When the aesthetic is mapped to a condition, it provides graph that follows the equation.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)
#1. When the facet is placed on a continuous variable, it gives a graph is subsetted to categories of drive and cylinders.
ggplot(data = mpg) +
geom_point(mapping = aes(x = drv, y = cyl)) + facet_grid(drv ~ cyl)
#2.The empty plots represent graphs that do not have any relation between drv and cyl.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
#3. The dot represents an axis, so if it takes an x the non-dot variable is across the y-axis and vice versa.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
#4.Using the facet subsets the data already, making it easy to determine patterns in specific categories. A disadvantage is that it does not
?facet_wrap
?facet_grid
#5.nrow determines the number of rows and ncol determines the number of columns. Facet_grid() does not have a nrow or ncol arguments because the columns and rows are defined by the faceting variables.
#6.Using a facet_grid for a variable with unique levels creates a graph that incorpates each level into a separate subset. This would make connections much more clearer.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(
mapping = aes(x = displ, y = hwy, color = drv),
show.legend = FALSE
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "subcompact"), se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
?geom
## No documentation for 'geom' in specified packages and libraries:
## you could try '??geom'
#1. geom_smooth for a line chart, geom_box for boxplot, geom_histogram for histogram, and geom_area for area chart.
#2. The chart will have points that follow along lines that correlate to the color of the drv variable, across disply vs hwy variables.
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point() +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#3. show.legend=FALSE turns off the legend produced in graphs. If it was removed, it would present the legend on the graph.
?geom_smooth
#4. The se argument displays the confidence interval around line.
#5. The graphs willnot be different as they both take the same data and push it through the same aesthtics arguments and variables, just through different methods.
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth(data = mpg, mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#6.
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth(data = mpg, se=FALSE, mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy,color=drv)) +
geom_smooth(data = mpg, se=FALSE, mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy,color=drv)) +
geom_smooth(data = mpg, se=FALSE, mapping = aes(x = displ, y = hwy, group=drv, color=drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy, color=drv)) +
geom_smooth(data = mpg, se=FALSE, mapping = aes(x = displ, y = hwy, linetype=drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy,color=drv))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))
demo <- tribble(
~cut, ~freq,
"Fair", 1610,
"Good", 4906,
"Very Good", 12082,
"Premium", 13791,
"Ideal", 21551
)
ggplot(data = demo) +
geom_bar(mapping = aes(x = cut, y = freq), stat = "identity")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
ggplot(data = diamonds) +
stat_summary(
mapping = aes(x = cut, y = depth),
fun.ymin = min,
fun.ymax = max,
fun.y = median
)
?stat_summary
#1. Stat_summary is associated with geom_boxplot, but one can modify it by chaging adding the argument "geom='graph'".
?geom_col
#2.geom_col creates a type of barchart that represents values in the data, compared to a geom_bar which is proportional to number of cases.
#3.Where do I find this list of geoms and stats?
?stat_smooth()
#4. stat_smooth() computes the condtional means same as geom_smooth(), however it can display the calculations with another geom.
#5.The cause of the error, is that the graph does not have a set of variables for the y-axis.
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop..))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = color, y = ..prop.., group=1))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, colour = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 1/5, position = "identity")
ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) +
geom_bar(fill = NA, position = "identity")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")
#1. The issue with this plot is that it is a bit misleading, as the plots are overstacking one another.
ggplot(data = mpg, mapping = aes(x = cty, y = hwy), poisition="count") +
geom_point()
#2.geom_jitter is controlled by width and height arguments.
?geom_jitter
?geom_count
#3. geom_jitter adds a small amount of random variation to each point while geom_count counts the number of observations for each possibility and then maps it to the point area.
?geom_boxplot
#4. Rge default positon for geom_boxplot is "dodge2", perserves the vertical distance, but affects the horizontal spacing. This show as the box plot has overlap on the y-axis, but along the x-axis.
ggplot(data = mpg, mapping=aes(x=drv, y=hwy))+
geom_boxplot()
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip()
nz <- map_data("nz")
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
bar + coord_flip()
bar + coord_polar()
#1.
bar <- ggplot(data = mpg)+
geom_bar(
mapping = aes(x = class, fill = class),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
bar + coord_polar()
?labs
#2.labs() create the labels for the graph.
?coord_quickmap
?coord_map
#3.The difference between coord_quickmap and coord_map is that quickmap is an approximation that preserves straight lines, while map projects a portion of the earth on a flat plane.
?geom_abline
#4. The plot below indicates that cty and hwy are positively correlated. coord_fixed is important as it fixes the coordinates to a specific place on the graph. geom_abline() creates a reference line that helps illustrate a common trend in the graph.
ggplot(data = mpg, mapping = aes(x = cty, y = hwy)) +
geom_point() +
geom_abline() +
coord_fixed()
3.10
ggplot(data = ) +